Goals:

  1. Produce visuals of NOTAM frequencies by date.
  • Both daily fluctuations over time and frequency distributions of counts of NOTAMs by date
  1. Analyze which combination of predictors (including season and weekend/weekday) best capture the variation in the NOTAM count data.
  • Produce table of busiest combinations of season and weekday/end, with statistical support for differences between groups.
  1. Detect and analyze how intra-day variation can contribute to understanding NOTAM counts by date.

Daily variation

# Here we use the dplyr syntax of '%>%' to chain together multiple steps. We make a new data frame called 'df', based on an aggregation of the data in UniqueInteractions. dplyr is a package included in the 'tidyverse' set of packages. 

# Season: First define months from date time, then manually code months into seasons with a series of if/else statements nested together.

df <- UniqueInteractions %>% 
  mutate(weekend = as.factor(ifelse(dow == 'Saturday' | dow == 'Sunday', 1, 0)),
         quarter = as.factor(quarters(datetimes)),
         month   = format(date, '%m'),
         season  = as.factor(ifelse(month == '12' | month == '01' | month == '02', 'Winter',
                                    ifelse(month == '03' | month == '04' | month == '05', 'Spring',
                                           ifelse(month == '06' | month == '07' | month == '08', 'Summer',
                                                  ifelse(month == '09' | month == '10' | month == '11', 'Fall', NA)))))) %>% 
    group_by(date, month, season, weekend) %>% 
  summarize(count = n())

Plotting

For plotting, the ggplot2 package included in the tidyverse is quick and flexible. There is a similar chaining idea as in dplyr, but instead of ‘%>%’, the syntax is simply ‘+’. The main ggplot developer has expressed regret that ggplot doesn’t use the same chaining syntax.

1. Count of NOTAMs by date

g1 <- ggplot(df, aes(x = date, y = count)) +
  geom_point(color = scales::alpha('midnightblue', 0.2)) +          # Make semi-transparent points
  geom_smooth(span = 0.1) +                                         # Add a spline through these
  theme_bw() +                                                      # Use a cleaner black and white theme
  ylab("NOTAM counts") + xlab('Date') +
  ggtitle("Count of NOTAMs by date over the study period")          # Add a title to the plot

ggsave(plot = g1, filename = "NOTAM_Freq_timeline.jpg", width = 6, height = 5) # Save the figure to the working directory

ggplotly(g1)

Another version, with separate lines for weekend and weekday:

# Rename the weekend variable values 
we.labs = c("Weekday", "Weekend") # 0 = *weekday*, see logic in the formatting above
levels(df$weekend) = we.labs

g1.1 <- ggplot(df, aes(x = date, y = count, color = weekend)) +
  geom_point(alpha = 0.5) +          
  geom_smooth(span = 0.1) +                                         # Add a spline through these
  theme_bw() + 
  scale_color_discrete(name = "Day of Week") +                        
  ylab("NOTAM counts") + xlab('Date') +
  ggtitle("Count of NOTAMs by date over the study period \n Separating by weekend ")    

ggsave(plot = g1.1, filename = "NOTAM_Freq_timeline_we.jpg", width = 6, height = 5) 

ggplotly(g1.1)

2. Histograms by season + weekend/weekday

This figure shows the distribution of the count of NOTAMs by date. Each bar represents a range of NOTAM counts, and the height of bar indicates number of days with that many NOTAMs. The vertical colored lines indicate the mean count of NOTAMs for that combination of season and weekend/weekday.

# Make data frame of mean values
df_mc <- df %>%
  group_by(season, weekend) %>%
  summarize(mean_count = mean(count))

g2 <- ggplot(df, aes(count, fill = weekend, group = weekend)) +      # Within panels, group by weekend
  geom_histogram(color = "grey20",                                   # Create histograms
                 bins = 50) +
  geom_vline(aes(xintercept = mean_count,
                 color = weekend), data = df_mc) +
  facet_wrap(~season) +                                              # Make this a multi-panel figure by season
  scale_fill_discrete(name = "Day of Week") +                        # Set the name for the legend
  guides(color = FALSE) +                                            # Suppress legend for vertical lines
  theme_bw() +
  ylab("Count of days") + xlab("Count of NOTAMs") +
  ggtitle("Histograms of NOTAM counts by day")

# Add annotations for each vertical line
df_mc <- df_mc %>%
  mutate(y = 30,
         x = mean_count)

g3 = g2 + geom_text(data = df_mc, 
               mapping = aes(x = x, y = y, 
                             label = round(mean_count, 0),
                             color = weekend),
               size = 3,
               hjust = 1); g3

ggsave(plot = g3, filename = "NOTAM_Freq_histograms.jpg", width = 6, height = 5)            

Analysis

m1 <- aov(count ~ season + weekend,
          data = df)
summary(m1) # Yes, both season and weekend matter
##              Df   Sum Sq  Mean Sq F value Pr(>F)    
## season        3 10636805  3545602    76.7 <2e-16 ***
## weekend       1 23928932 23928932   517.6 <2e-16 ***
## Residuals   711 32868411    46228                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m2 <- aov(count ~ season * weekend,
          data = df)
summary(m2) # No significant interaction between season and weekend; there are not different patterns for the combination of season and weekend.
##                 Df   Sum Sq  Mean Sq F value Pr(>F)    
## season           3 10636805  3545602  76.557 <2e-16 ***
## weekend          1 23928932 23928932 516.678 <2e-16 ***
## season:weekend   3    78746    26249   0.567  0.637    
## Residuals      708 32789665    46313                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m1, m2) # Lower AIC for m1 confirms that this is a better model, although not a large difference
# Make a prediction frame for the eight cases of 4 seasons x 2 weekend for a table
TukeyHSD(m1) # Summer not sig different from spring; spring not sig diff from fall
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = count ~ season + weekend, data = df)
## 
## $season
##                    diff       lwr       upr     p adj
## Spring-Fall    42.61135 -15.43261 100.65531 0.2330220
## Summer-Fall    71.05801  12.93568 129.18035 0.0092568
## Winter-Fall   318.80713 259.50305 378.11122 0.0000000
## Summer-Spring  28.44666 -29.35630  86.24962 0.5841178
## Winter-Spring 276.19578 217.20467 335.18690 0.0000000
## Winter-Summer 247.74912 188.68089 306.81735 0.0000000
## 
## $weekend
##                      diff       lwr       upr p adj
## Weekend-Weekday -405.6122 -440.6142 -370.6103     0
# Test Poisson regression (shouldn't be necessary, since numbers are large)
m3 <- glm(count ~ season + weekend,
          family = 'poisson',
          data = df)

AIC(m1, m3)
# Much worse by AIC

Test including month of year. This shows no significant effect of month apart from the already-included ‘season’ variable. We also check the AIC of these two models; a value closer to zero indicates that the model is a more parsimonious characterization of the data.

m4 <- aov(count ~ season + weekend + month,
          data = df)
summary(m4) 
##              Df   Sum Sq  Mean Sq F value Pr(>F)    
## season        3 10636805  3545602  76.704 <2e-16 ***
## weekend       1 23928932 23928932 517.669 <2e-16 ***
## month         8   372690    46586   1.008  0.429    
## Residuals   703 32495722    46224                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m1, m4) 
# Diagnostics for model m1:
# post hoc tests between categorical variable levels
(m1.hsd <- TukeyHSD(m1))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = count ~ season + weekend, data = df)
## 
## $season
##                    diff       lwr       upr     p adj
## Spring-Fall    42.61135 -15.43261 100.65531 0.2330220
## Summer-Fall    71.05801  12.93568 129.18035 0.0092568
## Winter-Fall   318.80713 259.50305 378.11122 0.0000000
## Summer-Spring  28.44666 -29.35630  86.24962 0.5841178
## Winter-Spring 276.19578 217.20467 335.18690 0.0000000
## Winter-Summer 247.74912 188.68089 306.81735 0.0000000
## 
## $weekend
##                      diff       lwr       upr p adj
## Weekend-Weekday -405.6122 -440.6142 -370.6103     0
# Plotting residuals: still some pattern
plot(resid(m1))

# Plotting standard regression diagnostics:
par(mfrow = c(2, 2)); plot(m1) # Still have two outliers, but a better model than m1

df[c(72, 73),] # two days in December 2017 with 2891 and 2673 NOTAMs, very high.

Finally, just plotting the month-by-month frequency values for comparison, even though we see above that the season variable is sufficient to characterize the monthly differences.

m_df <- df %>% 
  group_by(month) %>% 
  summarize(mean_count = mean(count), 
            se_count = sd(count)/sqrt(length(count)),
            upper = mean_count + se_count,
            lower = mean_count - se_count)

ggplot(m_df, aes(x = month, y = mean_count)) +
  geom_pointrange(aes(ymax = upper,
                      ymin = lower)) +
#  coord_polar() +                                        # Uncomment for circular plot
  theme_bw() +
  ylab('Mean count of NOTAMs') +
  ggtitle("Mean count of NOTAMs by month \n +/- 1 s.e.")

Intra-day variation

Further analysis, first to identify what the peak and non-peak times of day are, and then to include these intra-day periods in the model. We will then look at whether including intra-day variation significantly improves the model performance.

Plotting

First, we need to identify what peak and non-peak times of the day are. Each point in the plot below represents the observed count of NOTMAS within one day and hour.

df_hr <- UniqueInteractions %>%
  mutate(weekend = as.factor(ifelse(dow == 'Saturday' | dow == 'Sunday', 1, 0)),
         quarter = as.factor(quarters(datetimes)),
         month   = format(date, '%m'),
         season  = as.factor(ifelse(month == '12' | month == '01' | month == '02', 'Winter',
                                    ifelse(month == '03' | month == '04' | month == '05', 'Spring',
                                           ifelse(month == '06' | month == '07' | month == '08', 'Summer',
                                                  ifelse(month == '09' | month == '10' | month == '11', 'Fall', NA)))))) %>% 
    group_by(hour, date, month, season, weekend) %>% 
  summarize(count = n())

# Rename the weekend variable values 
we.labs = c("Weekday", "Weekend") # 0 = *weekday*, see logic in the formatting above
levels(df_hr$weekend) = we.labs

g4 <- ggplot(df_hr, aes(x = hour, y = count, color = weekend)) +
  geom_point(alpha = 0.4) + 
  geom_smooth(span = 0.3) +
  theme_bw() +
  ylab('Count of NOTAMs by day and hour') + xlab('Hour of day') +
  ggtitle('Count of NOTAMS') +
  scale_color_discrete(name = "Day of Week") +
  facet_wrap(~season)

# ggplotly(g4)
g4

ggsave(plot = g4, filename = "NOTAM_Freq_ToD_Season_by_day.jpg", width = 6, height = 5)           

From this plot, we can see that there does not seem to be a strong seasonal pattern in the hourly fluctuation. Overall, it looks like peak for weekdays is 13:00 - 21:00. Weekends are distinctly less peaked, but same time period.

Now, we repeat this daily plot with the average daily count of NOTAMs by hour, to generalize the typical workload expected by hour and season. Each point in the plot below represents the average daily count of NOTMAS within an hour for one month.

# Repeat of last plot, but now for average hourly count of NOTAMs within weekend/weekday and season
df_hr_ave <- df_hr %>%
  group_by(hour, month, season, weekend) %>%
  summarize(ave_count = mean(count),
            se_count = sd(count)/sqrt(n()))

g5 <- ggplot(df_hr_ave, aes(x = hour, y = ave_count, color = weekend)) +
  geom_point(alpha = 0.4) + 
  geom_smooth(span = 0.3) +
  theme_bw() +
  ylab('Average of NOTAMs within an hour') + xlab('Hour of day') +
   ggtitle('Average count of NOTAMS') +
  scale_color_discrete(name = "Day of Week") +
  facet_wrap(~season)

ggplotly(g5)
ggsave(plot = g5, filename = "NOTAM_Freq_ToD_Season_ave_day.jpg", width = 6, height = 5)           

Focusing on just the weekday/weekend split now, plot across seasons:

g5 <- ggplot(df_hr, aes(x = hour, y = count, color = weekend)) +
  geom_point(alpha = 0.5) + 
  geom_smooth(span = 0.3) +
  theme_bw()  +
  scale_color_discrete(name = "Day of Week") +
  facet_wrap(~weekend)



ggplotly(g5)
ggsave(plot = g5, filename = "NOTAM_Freq_ToD_WE.jpg", width = 6, height = 5)            

Now it is more clear that there is a distinct shape to the weekday peak, with a stronger early afternoon peak from 13:00-16:00 and then a shoulder from 17:00-21:00.

We’ll assess two designations of the peak: two levels with peak and off-peak, and three levels with peak, off-peak, and valley. We’ll first use 13:00 - 21:00 UTC as the peak, and all other times as off-peak. We then also designate 05:00 - 09:00 as ‘valley’ for the three-level version.

The periods are as follows:

df_hr <- df_hr %>%
  mutate(peak2 = ifelse(hour >= 13 & hour <= 21, 'peak', 'off-peak'),
         peak3 = ifelse(hour >= 13 & hour <= 21, 'peak', 
                        ifelse(hour >= 5 & hour <= 10, 'valley',
                               'off-peak')))

df_hr$peak2 = as.factor(df_hr$peak2)
#df_hr$peak3 = as.factor(df_hr$peak3)

hr_tab <- df_hr %>% 
  group_by(hour) %>%
  summarize(Peak_2 = unique(peak2),
  Peak_3 = unique(peak3))

knitr::kable(hr_tab, col.names = c('Hour', 'Two-level designation', 'Three-level designation'))
Hour Two-level designation Three-level designation
0 off-peak off-peak
1 off-peak off-peak
2 off-peak off-peak
3 off-peak off-peak
4 off-peak off-peak
5 off-peak valley
6 off-peak valley
7 off-peak valley
8 off-peak valley
9 off-peak valley
10 off-peak valley
11 off-peak off-peak
12 off-peak off-peak
13 peak peak
14 peak peak
15 peak peak
16 peak peak
17 peak peak
18 peak peak
19 peak peak
20 peak peak
21 peak peak
22 off-peak off-peak
23 off-peak off-peak

Plotting these periods over the average daily count plots:

g6 <- ggplot(df_hr_ave, aes(x = hour, y = ave_count, color = weekend)) +
    geom_rect(xmin = 13, xmax = 21,
            ymin = 0, ymax = 200,
            fill = scales::alpha('palegreen1', 0.05),
            color = scales::alpha('grey90', 0)) +
    geom_rect(xmin = 5, xmax = 10,
            ymin = 0, ymax = 200,
            fill = scales::alpha('lightblue', 0.05),
            color = scales::alpha('grey90', 0))+
  geom_point(alpha = 0.4) + 
  geom_smooth(span = 0.3) +
  theme_bw() +
  ylab('Average of NOTAMs within an hour') + xlab('Hour of day') +
   ggtitle('Average count of NOTAMS') +
  scale_color_discrete(name = "Day of Week") +
  facet_wrap(~season) 

g7 = g6 + scale_fill_manual("Intra-Day Period", 
                       values = c(green_fill = scales::alpha('palegreen1', 0.05),
                                  blue_fill = scales::alpha('lightblue', 0.05)),
                       labels = c("Peak", "Valley"),
                       guide = guide_legend(override.aes=aes(fill=NA)))

g7

ggsave(plot = g7, filename = "NOTAM_Freq_ToD_Season_Period_Marked.jpg", width = 7, height = 5.5)           

Analysis

Here we create two variables called ‘peak2’ and ‘peak3’ and test if they are improvements over just using the hours directly.

m5 <- aov(count ~ season + weekend + hour,
          data = df_hr)

summary(m5)
##                Df  Sum Sq Mean Sq F value Pr(>F)    
## season          3  442167  147389   272.4 <2e-16 ***
## weekend         1  994195  994195  1837.6 <2e-16 ***
## hour            1 1733744 1733744  3204.5 <2e-16 ***
## Residuals   17153 9280349     541                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m6 <- aov(count ~ season + weekend + peak2,
          data = df_hr)

summary(m6)
##                Df  Sum Sq Mean Sq F value Pr(>F)    
## season          3  442167  147389   352.4 <2e-16 ***
## weekend         1  994195  994195  2377.2 <2e-16 ***
## peak2           1 3840474 3840474  9183.0 <2e-16 ***
## Residuals   17153 7173619     418                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m7 <- aov(count ~ season + weekend + peak3,
          data = df_hr)

summary(m7)
##                Df  Sum Sq Mean Sq F value Pr(>F)    
## season          3  442167  147389   381.9 <2e-16 ***
## weekend         1  994195  994195  2576.3 <2e-16 ***
## peak3           2 4395017 2197509  5694.4 <2e-16 ***
## Residuals   17152 6619075     386                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m5, m6, m7)

Yes, the lower AIC for m6 (with peak, not hour) indicates that this better represents the variation in the data compared to the hour variable. We also want to know specifically if there are differences in this intra-day variation by season. The yet-lower AIC for m7 indicates that peak3 is a better description of the intra-day fluctuation than peak2.

# With season x peak interaction

m8 <- aov(count ~ season + weekend + peak2 + season:peak2,
          data = df_hr)
summary(m8)
##                 Df  Sum Sq Mean Sq F value Pr(>F)    
## season           3  442167  147389   362.7 <2e-16 ***
## weekend          1  994195  994195  2446.8 <2e-16 ***
## peak2            1 3840474 3840474  9451.6 <2e-16 ***
## season:peak2     3  205059   68353   168.2 <2e-16 ***
## Residuals    17150 6968559     406                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Adding weekend x peak interaction as well
m9 <- aov(count ~ season + weekend + peak2 + weekend:peak2 + season:peak2,
          data = df_hr)

summary(m9)
##                  Df  Sum Sq Mean Sq F value Pr(>F)    
## season            3  442167  147389   396.3 <2e-16 ***
## weekend           1  994195  994195  2673.3 <2e-16 ***
## peak2             1 3840474 3840474 10326.6 <2e-16 ***
## weekend:peak2     1  590562  590562  1588.0 <2e-16 ***
## season:peak2      3  205332   68444   184.0 <2e-16 ***
## Residuals     17149 6377725     372                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(m7, m8, m9)
# Using 3 period peak variable
m10 <- aov(count ~ season + weekend + peak3 + season:peak3,
          data = df_hr)
summary(m10)
##                 Df  Sum Sq Mean Sq F value Pr(>F)    
## season           3  442167  147389  394.38 <2e-16 ***
## weekend          1  994195  994195 2660.23 <2e-16 ***
## peak3            2 4395017 2197509 5880.02 <2e-16 ***
## season:peak3     6  211186   35198   94.18 <2e-16 ***
## Residuals    17146 6407889     374                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Adding weekend x peak interaction as well
m11 <- aov(count ~ season + weekend + peak3 + weekend:peak3 + season:peak3,
          data = df_hr)

summary(m11)
##                  Df  Sum Sq Mean Sq F value Pr(>F)    
## season            3  442167  147389   437.3 <2e-16 ***
## weekend           1  994195  994195  2949.7 <2e-16 ***
## peak3             2 4395017 2197509  6519.9 <2e-16 ***
## weekend:peak3     2  629280  314640   933.5 <2e-16 ***
## season:peak3      6  211501   35250   104.6 <2e-16 ***
## Residuals     17144 5778294     337                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Akaike Information Criterion

AIC(m7, m10, m11)

The model which includes both interactions (season x peak and weekend x peak) is a much better model than with just season x peak or no interactions, using peak3 as our categorization of variation within a day. This is the model we can use to generate estimates of NOTAM workload going forward.

Generating estimates

Given this model, now we can estimate the expected hourly counts of NOTAMs by season, weekend/weekday, and intra-day period.

The table below shows the model predictions for an hour within

pred_dat  = data.frame(season = gl(4, 2*3, labels = levels(df_hr$season)),
                       weekend = rep(gl(2, 1, labels = levels(df_hr$weekend)), 4*3),
                       peak3 = rep(gl(3, 2, labels = unique(df_hr$peak3)), 2*2))

pred11 <- predict(m11, 
                  newdata = pred_dat,
                  se.fit = T)

# Creating a variable to represent the number of hours each of these periods; we will used this to calculate SD from SE

n_hr = pred_dat$peak3 
levels(n_hr) = c(9, 6, 9)
n_hr = as.numeric(as.character(n_hr))

pred <- data.frame(pred_dat,
                   Estimate = pred11$fit,
                   # StdErr = pred11$se.fit,
                   SD = pred11$se.fit * sqrt(n_hr))

# Adding percentiles. Since these are normal distributions, we use the following multipliers of the standard deviation to compute percentiles
# 2.5/ 97.5 = 1.96
# 5  / 95 = 1.645
# 10 / 90 = 1.282

pred <- pred %>%
  mutate(Est_95 = Estimate + 1.645 * SD,
         Est_5 = Estimate - 1.645 * SD)

pred <- pred[order(pred$season,
                   pred$weekend,
                   pred$peak3),]

names(pred) = c('Season', 'Weekend/Day', 'Intra-Day Period',
                'Estimate',
                'StdDev', '95th Percentile', '5th Percentile')

knitr::kable(pred,
      caption = "Estimated counts of NOTAMS per hour by season, weekend/weekday, and intra-day period",
      digits = 2) %>% 
  kable_styling(bootstrap_options = c('striped','hover'))
Estimated counts of NOTAMS per hour by season, weekend/weekday, and intra-day period
Season Weekend/Day Intra-Day Period Estimate StdDev 95th Percentile 5th Percentile
1 Fall Weekday off-peak 28.31 1.43 30.67 25.95
3 Fall Weekday valley 11.89 1.44 14.25 9.52
5 Fall Weekday peak 56.70 1.43 59.06 54.34
2 Fall Weekend off-peak 17.99 1.75 20.87 15.11
4 Fall Weekend valley 10.32 1.76 13.21 7.43
6 Fall Weekend peak 23.01 1.75 25.89 20.13
7 Spring Weekday off-peak 29.82 1.42 32.16 27.49
9 Spring Weekday valley 14.47 1.42 16.81 12.12
11 Spring Weekday peak 58.21 1.42 60.55 55.88
8 Spring Weekend off-peak 19.50 1.74 22.36 16.64
10 Spring Weekend valley 12.90 1.75 15.77 10.03
12 Spring Weekend peak 24.52 1.74 27.38 21.65
13 Summer Weekday off-peak 32.84 1.43 35.19 30.50
15 Summer Weekday valley 13.33 1.43 15.68 10.99
17 Summer Weekday peak 59.21 1.42 61.55 56.86
14 Summer Weekend off-peak 22.52 1.74 25.39 19.66
16 Summer Weekend valley 11.77 1.74 14.63 8.90
18 Summer Weekend peak 25.51 1.74 28.37 22.65
19 Winter Weekday off-peak 35.81 1.48 38.24 33.38
21 Winter Weekday valley 18.50 1.48 20.93 16.07
23 Winter Weekday peak 80.25 1.48 82.68 77.82
20 Winter Weekend off-peak 25.49 1.78 28.43 22.56
22 Winter Weekend valley 16.93 1.79 19.88 13.99
24 Winter Weekend peak 46.55 1.78 49.49 43.62

WE can use this model to then generate estimates at a higher level, for example for a typical weekend or weekday in a given season.

# Weekday in Fall.
# Off peak x 9 hours
# Valley x 6 hours
# Peak x 9 hours 
predx <- pred[pred[,1] == 'Fall' & pred[,2] == 'Weekday',]

ex = predx[predx[,3] == 'off-peak', 'Estimate'] * 9 +
  predx[predx[,3] == 'valley', 'Estimate'] * 6 + 
  predx[predx[,3] == 'peak', 'Estimate'] * 9

err_ex = predx[predx[,3] == 'off-peak', 'StdDev'] * 9 +
  predx[predx[,3] == 'valley', 'StdDev'] * 6 + 
  predx[predx[,3] == 'peak', 'StdDev'] * 9

A typical weekday in fall, for example, would therefore be expected to have 836.5 +- 34.5 NOTAMs over the course of the day.

For all season x weekend/weekay combinations, the estimates over the course of a day are as follows:

n_hr = pred$`Intra-Day Period` 
levels(n_hr) = c(9, 6, 9)
n_hr = as.numeric(as.character(n_hr))

pred_tab = pred %>%
  mutate(Est_day = Estimate * n_hr,
         SD_day = StdDev * n_hr) %>%
  group_by(Season, `Weekend/Day`) %>%
  summarize(Estimate = sum(Est_day),
            SD = sum(SD_day))

pred_tab = pred_tab %>% 
  mutate(`95th Percentile` = Estimate + 1.645 * SD,
         `5th Percentile` = Estimate - 1.645 * SD)

kable(pred_tab,
      caption = "Estimated counts of NOTAMS per day by season and weekend/weekday",
      digits = 2) %>% 
  kable_styling(bootstrap_options = c('striped','hover'))
Estimated counts of NOTAMS per day by season and weekend/weekday
Season Weekend/Day Estimate SD 95th Percentile 5th Percentile
Fall Weekday 836.46 34.46 893.15 779.78
Fall Weekend 430.90 42.05 500.07 361.73
Spring Weekday 879.12 34.11 935.23 823.01
Spring Weekend 473.56 41.78 542.28 404.83
Summer Weekday 908.46 34.22 964.75 852.18
Summer Weekend 502.90 41.80 571.67 434.14
Winter Weekday 1155.53 35.46 1213.87 1097.20
Winter Weekend 749.97 42.85 820.47 679.48

For all intraday x weekend/weekay combinations, the estimates over the course of a day are as follows:

n_hr = pred$`Intra-Day Period` 
levels(n_hr) = c(9, 6, 9)
n_hr = as.numeric(as.character(n_hr))

pred_tab3 = pred %>%
  mutate(Est_day = Estimate * n_hr,
         SD_day = StdDev * n_hr) %>%
  group_by(Season, `Weekend/Day`) %>%
  summarize(Estimate = sum(Est_day),
            SD = sum(SD_day))

pred_tab3 = pred_tab3 %>% 
  mutate(`95th Percentile` = Estimate + 1.645 * SD,
         `5th Percentile` = Estimate - 1.645 * SD)

kable(pred_tab3,
      caption = "Estimated counts of NOTAMS per day by intra-day period and weekend/weekday",
      digits = 2) %>% 
  kable_styling(bootstrap_options = c('striped','hover'))
Estimated counts of NOTAMS per day by intra-day period and weekend/weekday
Season Weekend/Day Estimate SD 95th Percentile 5th Percentile
Fall Weekday 836.46 34.46 893.15 779.78
Fall Weekend 430.90 42.05 500.07 361.73
Spring Weekday 879.12 34.11 935.23 823.01
Spring Weekend 473.56 41.78 542.28 404.83
Summer Weekday 908.46 34.22 964.75 852.18
Summer Weekend 502.90 41.80 571.67 434.14
Winter Weekday 1155.53 35.46 1213.87 1097.20
Winter Weekend 749.97 42.85 820.47 679.48

Save the outputs for busy period work.

save(file = 'NOTAM_Freq_Model_Out.RData',
     list = c('pred',
              'pred_tab',
              'pred_tab3',
              'hr_tab',
              'm11')
     )